From 4a33b9f396dc7d5d233c1bfd8353b2e990ee37a4 Mon Sep 17 00:00:00 2001 From: "akw27@labyrinth.cl.cam.ac.uk" Date: Tue, 8 Feb 2005 14:47:09 +0000 Subject: [PATCH] bitkeeper revision 1.1159.249.1 (4208d0edi9hSgBaQl0hrDcYiEH0yxg) blktap driver fixes: Allow the tap to terminate a block device (not need a backend). Allow it to run in dom0. Forward control messages to user space as well. Signed-off-by: akw27@cl.cam.ac.uk --- .../drivers/xen/blktap/blktap.c | 7 +- .../drivers/xen/blktap/blktap.h | 19 ++-- .../drivers/xen/blktap/blktap_controlmsg.c | 43 +++++---- .../drivers/xen/blktap/blktap_datapath.c | 34 ++++--- .../drivers/xen/blktap/blktap_userdev.c | 96 +++++++++++++++---- 5 files changed, 141 insertions(+), 58 deletions(-) diff --git a/linux-2.6.10-xen-sparse/drivers/xen/blktap/blktap.c b/linux-2.6.10-xen-sparse/drivers/xen/blktap/blktap.c index e4fbf390bc..a9a00677bc 100644 --- a/linux-2.6.10-xen-sparse/drivers/xen/blktap/blktap.c +++ b/linux-2.6.10-xen-sparse/drivers/xen/blktap/blktap.c @@ -16,7 +16,7 @@ #include "blktap.h" -int __init xlblk_init(void) +int __init xlblktap_init(void) { ctrl_msg_t cmsg; blkif_fe_driver_status_t fe_st; @@ -64,6 +64,7 @@ int __init xlblk_init(void) return 0; } +#if 0 /* tap doesn't handle suspend/resume */ void blkdev_suspend(void) { } @@ -81,6 +82,6 @@ void blkdev_resume(void) memcpy(cmsg.msg, &st, sizeof(st)); ctrl_if_send_message_block(&cmsg, NULL, 0, TASK_UNINTERRUPTIBLE); } +#endif - -__initcall(xlblk_init); +__initcall(xlblktap_init); diff --git a/linux-2.6.10-xen-sparse/drivers/xen/blktap/blktap.h b/linux-2.6.10-xen-sparse/drivers/xen/blktap/blktap.h index 2d67d592fc..e851cc72af 100644 --- a/linux-2.6.10-xen-sparse/drivers/xen/blktap/blktap.h +++ b/linux-2.6.10-xen-sparse/drivers/xen/blktap/blktap.h @@ -48,6 +48,12 @@ #define WPRINTK(fmt, args...) printk(KERN_WARNING "blk_tap: " fmt, ##args) +/* -------[ state descriptors ]--------------------------------------- */ + +#define BLKIF_STATE_CLOSED 0 +#define BLKIF_STATE_DISCONNECTED 1 +#define BLKIF_STATE_CONNECTED 2 + /* -------[ connection tracking ]------------------------------------- */ #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0) @@ -99,7 +105,6 @@ typedef struct { unsigned long mach_fas[BLKIF_MAX_SEGMENTS_PER_REQUEST]; unsigned long virt_fas[BLKIF_MAX_SEGMENTS_PER_REQUEST]; int next_free; - int inuse; /* debugging */ } active_req_t; typedef unsigned int ACTIVE_RING_IDX; @@ -181,7 +186,7 @@ extern unsigned long mmap_vstart; * for shared memory rings. */ -#define RING_PAGES 128 +#define RING_PAGES 3 /* Ctrl, Front, and Back */ extern unsigned long rings_vstart; @@ -190,11 +195,10 @@ extern unsigned long blktap_mode; /* Connection to a single backend domain. */ extern blkif_front_ring_t blktap_be_ring; +extern unsigned int blktap_be_evtchn; +extern unsigned int blktap_be_state; -/* Event channel to backend domain. */ -extern unsigned int blkif_ptbe_evtchn; - -/* User ring status... this will soon vanish into a ring struct. */ +/* User ring status. */ extern unsigned long blktap_ring_ok; /* -------[ ...and function prototypes. ]----------------------------- */ @@ -213,8 +217,7 @@ void blktap_kick_user(void); /* user ring access functions: */ int blktap_write_fe_ring(blkif_request_t *req); int blktap_write_be_ring(blkif_response_t *rsp); -int blktap_read_fe_ring(void); -int blktap_read_be_ring(void); +int blktap_write_ctrl_ring(ctrl_msg_t *msg); /* fe/be ring access functions: */ int write_resp_to_fe_ring(blkif_t *blkif, blkif_response_t *rsp); diff --git a/linux-2.6.10-xen-sparse/drivers/xen/blktap/blktap_controlmsg.c b/linux-2.6.10-xen-sparse/drivers/xen/blktap/blktap_controlmsg.c index b3cd111897..98a76f15af 100644 --- a/linux-2.6.10-xen-sparse/drivers/xen/blktap/blktap_controlmsg.c +++ b/linux-2.6.10-xen-sparse/drivers/xen/blktap/blktap_controlmsg.c @@ -10,10 +10,6 @@ #include "blktap.h" -#define BLKIF_STATE_CLOSED 0 -#define BLKIF_STATE_DISCONNECTED 1 -#define BLKIF_STATE_CONNECTED 2 - static char *blkif_state_name[] = { [BLKIF_STATE_CLOSED] = "closed", [BLKIF_STATE_DISCONNECTED] = "disconnected", @@ -26,9 +22,10 @@ static char * blkif_status_name[] = { [BLKIF_INTERFACE_STATUS_CONNECTED] = "connected", [BLKIF_INTERFACE_STATUS_CHANGED] = "changed", }; -static unsigned int blkif_pt_state = BLKIF_STATE_CLOSED; -static unsigned blkif_ptbe_irq; -unsigned int blkif_ptbe_evtchn; + +static unsigned blktap_be_irq; +unsigned int blktap_be_state = BLKIF_STATE_CLOSED; +unsigned int blktap_be_evtchn; /*-----[ Control Messages to/from Frontend VMs ]--------------------------*/ @@ -306,7 +303,7 @@ static void blkif_ptbe_disconnect(void) sring = (blkif_sring_t *)__get_free_page(GFP_KERNEL); SHARED_RING_INIT(BLKIF_RING, sring); FRONT_RING_INIT(BLKIF_RING, &blktap_be_ring, sring); - blkif_pt_state = BLKIF_STATE_DISCONNECTED; + blktap_be_state = BLKIF_STATE_DISCONNECTED; DPRINTK("Blkif-Passthrough-BE is now DISCONNECTED.\n"); blkif_ptbe_send_interface_connect(); } @@ -315,10 +312,10 @@ static void blkif_ptbe_connect(blkif_fe_interface_status_t *status) { int err = 0; - blkif_ptbe_evtchn = status->evtchn; - blkif_ptbe_irq = bind_evtchn_to_irq(blkif_ptbe_evtchn); + blktap_be_evtchn = status->evtchn; + blktap_be_irq = bind_evtchn_to_irq(blktap_be_evtchn); - err = request_irq(blkif_ptbe_irq, blkif_ptbe_int, + err = request_irq(blktap_be_irq, blkif_ptbe_int, SA_SAMPLE_RANDOM, "blkif", NULL); if ( err ) { WPRINTK("blkfront request_irq failed (%d)\n", err); @@ -326,7 +323,7 @@ static void blkif_ptbe_connect(blkif_fe_interface_status_t *status) } else { /* transtion to connected in case we need to do a a partion probe on a whole disk */ - blkif_pt_state = BLKIF_STATE_CONNECTED; + blktap_be_state = BLKIF_STATE_CONNECTED; } } @@ -334,7 +331,7 @@ static void unexpected(blkif_fe_interface_status_t *status) { WPRINTK(" TAP: Unexpected blkif status %s in state %s\n", blkif_status_name[status->status], - blkif_state_name[blkif_pt_state]); + blkif_state_name[blktap_be_state]); } static void blkif_ptbe_status( @@ -352,7 +349,7 @@ static void blkif_ptbe_status( switch ( status->status ) { case BLKIF_INTERFACE_STATUS_CLOSED: - switch ( blkif_pt_state ) + switch ( blktap_be_state ) { case BLKIF_STATE_CLOSED: unexpected(status); @@ -366,7 +363,7 @@ static void blkif_ptbe_status( break; case BLKIF_INTERFACE_STATUS_DISCONNECTED: - switch ( blkif_pt_state ) + switch ( blktap_be_state ) { case BLKIF_STATE_CLOSED: blkif_ptbe_disconnect(); @@ -380,7 +377,7 @@ static void blkif_ptbe_status( break; case BLKIF_INTERFACE_STATUS_CONNECTED: - switch ( blkif_pt_state ) + switch ( blktap_be_state ) { case BLKIF_STATE_CLOSED: unexpected(status); @@ -398,7 +395,7 @@ static void blkif_ptbe_status( break; case BLKIF_INTERFACE_STATUS_CHANGED: - switch ( blkif_pt_state ) + switch ( blktap_be_state ) { case BLKIF_STATE_CLOSED: case BLKIF_STATE_DISCONNECTED: @@ -440,6 +437,14 @@ void blkif_ctrlif_rx(ctrl_msg_t *msg, unsigned long id) case CMSG_BLKIF_BE: + /* send a copy of the message to user if wanted */ + + if ( (blktap_mode & BLKTAP_MODE_INTERCEPT_FE) || + (blktap_mode & BLKTAP_MODE_COPY_FE) ) { + + blktap_write_ctrl_ring(msg); + } + switch ( msg->subtype ) { case CMSG_BLKIF_BE_CREATE: @@ -500,11 +505,13 @@ void blkif_ctrlif_rx(ctrl_msg_t *msg, unsigned long id) ctrl_if_send_response(msg); } -/*-----[ All control messages enter here: ]-------------------------------*/ +/*-----[ Initialization ]-------------------------------------------------*/ void __init blkif_interface_init(void) { blkif_cachep = kmem_cache_create("blkif_cache", sizeof(blkif_t), 0, 0, NULL, NULL); memset(blkif_hash, 0, sizeof(blkif_hash)); + + blktap_be_ring.sring = NULL; } diff --git a/linux-2.6.10-xen-sparse/drivers/xen/blktap/blktap_datapath.c b/linux-2.6.10-xen-sparse/drivers/xen/blktap/blktap_datapath.c index 367a83cecc..7bbe36ad89 100644 --- a/linux-2.6.10-xen-sparse/drivers/xen/blktap/blktap_datapath.c +++ b/linux-2.6.10-xen-sparse/drivers/xen/blktap/blktap_datapath.c @@ -40,8 +40,6 @@ inline active_req_t *get_active_req(void) spin_lock_irqsave(&active_req_lock, flags); idx = active_req_ring[MASK_ACTIVE_IDX(active_cons++)]; ar = &active_reqs[idx]; -if (ar->inuse) WPRINTK("AR INUSE! (%lu)\n", ar->id); -ar->inuse = 1; spin_unlock_irqrestore(&active_req_lock, flags); return ar; @@ -52,7 +50,6 @@ inline void free_active_req(active_req_t *ar) unsigned long flags; spin_lock_irqsave(&active_req_lock, flags); -ar->inuse = 0; active_req_ring[MASK_ACTIVE_IDX(active_prod++)] = ACTIVE_IDX(ar); spin_unlock_irqrestore(&active_req_lock, flags); } @@ -97,11 +94,8 @@ inline int write_resp_to_fe_ring(blkif_t *blkif, blkif_response_t *rsp) blkif_response_t *resp_d; active_req_t *ar; - /* remap id, and free the active req. blkif lookup goes here too.*/ ar = &active_reqs[ID_TO_IDX(rsp->id)]; - /* WPRINTK("%3u > %3lu\n", ID_TO_IDX(rsp->id), ar->id); */ rsp->id = ar->id; - free_active_req(ar); resp_d = RING_GET_RESPONSE(BLKIF_RING, &blkif->blk_ring, blkif->blk_ring.rsp_prod_pvt); @@ -109,6 +103,9 @@ inline int write_resp_to_fe_ring(blkif_t *blkif, blkif_response_t *rsp) wmb(); blkif->blk_ring.rsp_prod_pvt++; + blkif_put(ar->blkif); + free_active_req(ar); + return 0; } @@ -116,6 +113,11 @@ inline int write_req_to_be_ring(blkif_request_t *req) { blkif_request_t *req_d; + if ( blktap_be_state != BLKIF_STATE_CONNECTED ) { + WPRINTK("Tap trying to access an unconnected backend!\n"); + return 0; + } + req_d = RING_GET_REQUEST(BLKIF_RING, &blktap_be_ring, blktap_be_ring.req_prod_pvt); memcpy(req_d, req, sizeof(blkif_request_t)); @@ -135,6 +137,9 @@ inline void kick_fe_domain(blkif_t *blkif) inline void kick_be_domain(void) { + if ( blktap_be_state != BLKIF_STATE_CONNECTED ) + return; + wmb(); /* Ensure that the frontend can see the requests. */ RING_PUSH_REQUESTS(BLKIF_RING, &blktap_be_ring); notify_via_evtchn(blkif_ptbe_evtchn); @@ -310,6 +315,7 @@ static int do_block_io_op(blkif_t *blkif, int max_to_do) */ ar = get_active_req(); ar->id = req_s->id; + blkif_get(blkif); ar->blkif = blkif; req_s->id = MAKE_ID(blkif->domid, ACTIVE_IDX(ar)); /* WPRINTK("%3u < %3lu\n", ID_TO_IDX(req_s->id), ar->id); */ @@ -458,11 +464,13 @@ void print_vm_ring_idxs(void) blkif->blk_ring.sring->req_prod, blkif->blk_ring.sring->rsp_prod); } - WPRINTK("BE Ring: \n--------\n"); - WPRINTK("BE: rsp_cons: %2d, req_prod_prv: %2d " - "| req_prod: %2d, rsp_prod: %2d\n", - blktap_be_ring.rsp_cons, - blktap_be_ring.req_prod_pvt, - blktap_be_ring.sring->req_prod, - blktap_be_ring.sring->rsp_prod); + if (blktap_be_ring.sring != NULL) { + WPRINTK("BE Ring: \n--------\n"); + WPRINTK("BE: rsp_cons: %2d, req_prod_prv: %2d " + "| req_prod: %2d, rsp_prod: %2d\n", + blktap_be_ring.rsp_cons, + blktap_be_ring.req_prod_pvt, + blktap_be_ring.sring->req_prod, + blktap_be_ring.sring->rsp_prod); + } } diff --git a/linux-2.6.10-xen-sparse/drivers/xen/blktap/blktap_userdev.c b/linux-2.6.10-xen-sparse/drivers/xen/blktap/blktap_userdev.c index 500270259c..1876287a12 100644 --- a/linux-2.6.10-xen-sparse/drivers/xen/blktap/blktap_userdev.c +++ b/linux-2.6.10-xen-sparse/drivers/xen/blktap/blktap_userdev.c @@ -19,6 +19,7 @@ #include #include #include +#include /* for control ring. */ #include "blktap.h" @@ -40,6 +41,11 @@ unsigned long rings_vstart; /* Rings up to user space. */ static blkif_front_ring_t blktap_ufe_ring; static blkif_back_ring_t blktap_ube_ring; +static ctrl_front_ring_t blktap_uctrl_ring; + +/* local prototypes */ +static int blktap_read_fe_ring(void); +static int blktap_read_be_ring(void); /* -------[ blktap vm ops ]------------------------------------------- */ @@ -66,16 +72,28 @@ struct vm_operations_struct blktap_vm_ops = { static int blktap_open(struct inode *inode, struct file *filp) { blkif_sring_t *sring; + ctrl_sring_t *csring; if ( test_and_set_bit(0, &blktap_dev_inuse) ) return -EBUSY; printk(KERN_ALERT "blktap open.\n"); + + /* Allocate the ctrl ring. */ + csring = (ctrl_sring_t *)get_zeroed_page(GFP_KERNEL); + if (csring == NULL) + goto fail_nomem; + + SetPageReserved(virt_to_page(csring)); + + SHARED_RING_INIT(CTRL_RING, csring); + FRONT_RING_INIT(CTRL_RING, &blktap_uctrl_ring, csring); + /* Allocate the fe ring. */ sring = (blkif_sring_t *)get_zeroed_page(GFP_KERNEL); if (sring == NULL) - goto fail_nomem; + goto fail_free_ctrl; SetPageReserved(virt_to_page(sring)); @@ -95,6 +113,9 @@ static int blktap_open(struct inode *inode, struct file *filp) DPRINTK(KERN_ALERT "blktap open.\n"); return 0; + + fail_free_ctrl: + free_page( (unsigned long) blktap_uctrl_ring.sring); fail_free_fe: free_page( (unsigned long) blktap_ufe_ring.sring); @@ -111,6 +132,9 @@ static int blktap_release(struct inode *inode, struct file *filp) printk(KERN_ALERT "blktap closed.\n"); /* Free the ring page. */ + ClearPageReserved(virt_to_page(blktap_uctrl_ring.sring)); + free_page((unsigned long) blktap_uctrl_ring.sring); + ClearPageReserved(virt_to_page(blktap_ufe_ring.sring)); free_page((unsigned long) blktap_ufe_ring.sring); @@ -120,6 +144,15 @@ static int blktap_release(struct inode *inode, struct file *filp) return 0; } +/* Note on mmap: + * remap_pfn_range sets VM_IO on vma->vm_flags. In trying to make libaio + * work to do direct page access from userspace, this ended up being a + * problem. The bigger issue seems to be that there is no way to map + * a foreign page in to user space and have the virtual address of that + * page map sanely down to a mfn. + * Removing the VM_IO flag results in a loop in get_user_pages, as + * pfn_valid() always fails on a foreign page. + */ static int blktap_mmap(struct file *filp, struct vm_area_struct *vma) { int size; @@ -148,20 +181,28 @@ static int blktap_mmap(struct file *filp, struct vm_area_struct *vma) /* not sure if I really need to do this... */ vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); + DPRINTK("Mapping ctrl_ring page %lx.\n", __pa(blktap_uctrl_ring.sring)); + if (remap_pfn_range(vma, vma->vm_start, + __pa(blktap_uctrl_ring.sring) >> PAGE_SHIFT, + PAGE_SIZE, vma->vm_page_prot)) { + WPRINTK("ctrl_ring: remap_pfn_range failure!\n"); + } + + DPRINTK("Mapping be_ring page %lx.\n", __pa(blktap_ube_ring.sring)); - if (remap_page_range(vma, vma->vm_start, - __pa(blktap_ube_ring.sring), + if (remap_pfn_range(vma, vma->vm_start + PAGE_SIZE, + __pa(blktap_ube_ring.sring) >> PAGE_SHIFT, PAGE_SIZE, vma->vm_page_prot)) { - WPRINTK("be_ring: remap_page_range failure!\n"); + WPRINTK("be_ring: remap_pfn_range failure!\n"); } DPRINTK("Mapping fe_ring page %lx.\n", __pa(blktap_ufe_ring.sring)); - if (remap_page_range(vma, vma->vm_start + PAGE_SIZE, - __pa(blktap_ufe_ring.sring), + if (remap_pfn_range(vma, vma->vm_start + ( 2 * PAGE_SIZE ), + __pa(blktap_ufe_ring.sring) >> PAGE_SHIFT, PAGE_SIZE, vma->vm_page_prot)) { - WPRINTK("fe_ring: remap_page_range failure!\n"); + WPRINTK("fe_ring: remap_pfn_range failure!\n"); } - + blktap_vma = vma; blktap_ring_ok = 1; @@ -211,9 +252,11 @@ static unsigned int blktap_poll(struct file *file, poll_table *wait) { poll_wait(file, &blktap_wait, wait); - if ( RING_HAS_UNPUSHED_REQUESTS(BLKIF_RING, &blktap_ufe_ring) || + if ( RING_HAS_UNPUSHED_REQUESTS(BLKIF_RING, &blktap_uctrl_ring) || + RING_HAS_UNPUSHED_REQUESTS(BLKIF_RING, &blktap_ufe_ring) || RING_HAS_UNPUSHED_RESPONSES(BLKIF_RING, &blktap_ube_ring) ) { + RING_PUSH_REQUESTS(BLKIF_RING, &blktap_uctrl_ring); RING_PUSH_REQUESTS(BLKIF_RING, &blktap_ufe_ring); RING_PUSH_RESPONSES(BLKIF_RING, &blktap_ube_ring); return POLLIN | POLLRDNORM; @@ -260,7 +303,6 @@ int blktap_write_fe_ring(blkif_request_t *req) return 0; } - //target = RING_NEXT_EMPTY_REQUEST(BLKIF_RING, &blktap_ufe_ring); target = RING_GET_REQUEST(BLKIF_RING, &blktap_ufe_ring, blktap_ufe_ring.req_prod_pvt); memcpy(target, req, sizeof(*req)); @@ -270,7 +312,7 @@ int blktap_write_fe_ring(blkif_request_t *req) error = direct_remap_area_pages(blktap_vma->vm_mm, MMAP_VADDR(ID_TO_IDX(req->id), i), - target->frame_and_sects[0] & PAGE_MASK, + target->frame_and_sects[i] & PAGE_MASK, PAGE_SIZE, blktap_vma->vm_page_prot, ID_TO_DOM(req->id)); @@ -302,7 +344,6 @@ int blktap_write_be_ring(blkif_response_t *rsp) /* No test for fullness in the response direction. */ - //target = RING_NEXT_EMPTY_RESPONSE(BLKIF_RING, &blktap_ube_ring); target = RING_GET_RESPONSE(BLKIF_RING, &blktap_ube_ring, blktap_ube_ring.rsp_prod_pvt); memcpy(target, rsp, sizeof(*rsp)); @@ -314,7 +355,7 @@ int blktap_write_be_ring(blkif_response_t *rsp) return 0; } -int blktap_read_fe_ring(void) +static int blktap_read_fe_ring(void) { /* This is called to read responses from the UFE ring. */ @@ -329,7 +370,6 @@ int blktap_read_fe_ring(void) if (blktap_mode & BLKTAP_MODE_INTERCEPT_FE) { /* for each outstanding message on the UFEring */ - //RING_FOREACH_RESPONSE(BLKIF_RING, &blktap_ufe_ring, prod, resp_s) { rp = blktap_ufe_ring.sring->rsp_prod; rmb(); @@ -349,7 +389,7 @@ int blktap_read_fe_ring(void) return 0; } -int blktap_read_be_ring(void) +static int blktap_read_be_ring(void) { /* This is called to read requests from the UBE ring. */ @@ -362,7 +402,6 @@ int blktap_read_be_ring(void) if (blktap_mode & BLKTAP_MODE_INTERCEPT_BE) { /* for each outstanding message on the UFEring */ - //RING_FOREACH_REQUEST(BLKIF_RING, &blktap_ube_ring, prod, req_s) { rp = blktap_ube_ring.sring->req_prod; rmb(); for ( i = blktap_ube_ring.req_cons; i != rp; i++ ) @@ -379,6 +418,31 @@ int blktap_read_be_ring(void) return 0; } + +int blktap_write_ctrl_ring(ctrl_msg_t *msg) +{ + ctrl_msg_t *target; + + if ( ! blktap_ring_ok ) { + DPRINTK("blktap: be_ring not ready for a request!\n"); + return 0; + } + + /* No test for fullness in the response direction. */ + + target = RING_GET_REQUEST(CTRL_RING, &blktap_uctrl_ring, + blktap_uctrl_ring.req_prod_pvt); + memcpy(target, msg, sizeof(*msg)); + + blktap_uctrl_ring.req_prod_pvt++; + + /* currently treat the ring as unidirectional. */ + blktap_uctrl_ring.rsp_cons = blktap_uctrl_ring.sring->rsp_prod; + + return 0; + +} + /* -------[ blktap module setup ]------------------------------------- */ static struct miscdevice blktap_miscdev = { -- 2.30.2